weather_df =
rnoaa::meteo_pull_monitors(c("USW00094728", "USC00519397", "USS0023B17S"),
var = c("PRCP", "TMIN", "TMAX"),
date_min = "2017-01-01",
date_max = "2017-12-31") %>%
mutate(
name = recode(id, USW00094728 = "CentralPark_NY",
USC00519397 = "Waikiki_HA",
USS0023B17S = "Waterhole_WA"),
tmin = tmin / 10,
tmax = tmax / 10) %>%
select(name, id, everything())
## file path: /Users/SigL/Library/Caches/rnoaa/ghcnd/USW00094728.dly
## file last updated: 2019-09-26 10:25:27
## file min/max dates: 1869-01-01 / 2019-09-30
## file path: /Users/SigL/Library/Caches/rnoaa/ghcnd/USC00519397.dly
## file last updated: 2019-09-26 10:25:41
## file min/max dates: 1965-01-01 / 2019-09-30
## file path: /Users/SigL/Library/Caches/rnoaa/ghcnd/USS0023B17S.dly
## file last updated: 2019-09-26 10:25:46
## file min/max dates: 1999-09-01 / 2019-09-30
weather_df
## # A tibble: 1,095 x 6
## name id date prcp tmax tmin
## <chr> <chr> <date> <dbl> <dbl> <dbl>
## 1 CentralPark_NY USW00094728 2017-01-01 0 8.9 4.4
## 2 CentralPark_NY USW00094728 2017-01-02 53 5 2.8
## 3 CentralPark_NY USW00094728 2017-01-03 147 6.1 3.9
## 4 CentralPark_NY USW00094728 2017-01-04 0 11.1 1.1
## 5 CentralPark_NY USW00094728 2017-01-05 0 1.1 -2.7
## 6 CentralPark_NY USW00094728 2017-01-06 13 0.6 -3.8
## 7 CentralPark_NY USW00094728 2017-01-07 81 -3.2 -6.6
## 8 CentralPark_NY USW00094728 2017-01-08 0 -3.8 -8.8
## 9 CentralPark_NY USW00094728 2017-01-09 0 -4.9 -9.9
## 10 CentralPark_NY USW00094728 2017-01-10 0 7.8 -6
## # … with 1,085 more rows
cache: save the dataset to the directory, not redownloading it every time
weather_df %>%
ggplot(aes(x = tmin, y = tmax, color = name)) +
geom_point(alpha = .5)
## Warning: Removed 15 rows containing missing values (geom_point).
captions and titles
weather_df %>%
ggplot(aes(x = tmin, y = tmax, color = name)) +
geom_point(alpha = .5) +
labs(
title = "Temperature plot",
x = "Minumum Temp (C)",
y = "Maximum Temp (C)",
caption = "Data from the rnoaa package"
)
## Warning: Removed 15 rows containing missing values (geom_point).
ticks and pokes (highlight scale on axis)
weather_df %>%
ggplot(aes(x = tmin, y = tmax)) +
geom_point(aes(color = name), alpha = .5) +
labs(
title = "Temperature plot",
x = "Minimum daily temperature (C)",
y = "Maxiumum daily temperature (C)",
caption = "Data from the rnoaa package") +
scale_x_continuous(
breaks = c(-15, 0, 15),
labels = c("-15 (too cold)", "0", "15")
)
## Warning: Removed 15 rows containing missing values (geom_point).
sqrt trans
weather_df %>%
ggplot(aes(x = tmin, y = tmax)) +
geom_point(aes(color = name), alpha = .5) +
labs(
title = "Temperature plot",
x = "Minimum daily temperature (C)",
y = "Maxiumum daily temperature (C)",
caption = "Data from the rnoaa package") +
scale_x_continuous(
breaks = c(-15, 0, 15),
labels = c("-15ºC", "0", "15"),
limits = c(-20, 30)) +
scale_y_continuous(
trans = "sqrt",
position = "right")
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 90 rows containing missing values (geom_point).
weather_df %>%
ggplot(aes(x = tmin, y = tmax)) +
geom_point(aes(color = name), alpha = .5) +
labs(
title = "Temperature plot",
x = "Minimum daily temperature (C)",
y = "Maxiumum daily temperature (C)",
caption = "Data from the rnoaa package") +
scale_color_hue(
name = "Location",
h = c(100, 300)
)
## Warning: Removed 15 rows containing missing values (geom_point).
ggp_base = weather_df %>%
ggplot(aes(x = tmin, y = tmax)) +
geom_point(aes(color = name), alpha = .5) +
labs(
title = "Temperature plot",
x = "Minimum daily temperature (C)",
y = "Maxiumum daily temperature (C)",
caption = "Data from the rnoaa package"
) +
viridis::scale_color_viridis(
name = "Location",
discrete = TRUE
)
ggp_base +
theme_bw() +
theme(legend.position = "bottom")
## Warning: Removed 15 rows containing missing values (geom_point).
ggp_base +
theme_minimal() + ##order matters
theme(legend.position = "bottom")
## Warning: Removed 15 rows containing missing values (geom_point).
theme_bw()–black and white theme theme_minimal has to go first, otherwise it will not change the position to “bottom”
(Where do my warnings come from)
weather_df %>%
filter(is.na(tmin))
## # A tibble: 15 x 6
## name id date prcp tmax tmin
## <chr> <chr> <date> <dbl> <dbl> <dbl>
## 1 Waikiki_HA USC00519397 2017-04-17 5 28.3 NA
## 2 Waikiki_HA USC00519397 2017-05-09 NA NA NA
## 3 Waikiki_HA USC00519397 2017-05-26 NA NA NA
## 4 Waikiki_HA USC00519397 2017-07-19 NA NA NA
## 5 Waikiki_HA USC00519397 2017-10-07 0 31.1 NA
## 6 Waikiki_HA USC00519397 2017-10-09 0 28.9 NA
## 7 Waikiki_HA USC00519397 2017-10-10 10 31.7 NA
## 8 Waikiki_HA USC00519397 2017-10-12 0 31.1 NA
## 9 Waikiki_HA USC00519397 2017-10-13 0 31.1 NA
## 10 Waikiki_HA USC00519397 2017-10-16 5 30 NA
## 11 Waikiki_HA USC00519397 2017-10-18 0 29.4 NA
## 12 Waikiki_HA USC00519397 2017-10-20 13 30.6 NA
## 13 Waikiki_HA USC00519397 2017-10-21 0 30 NA
## 14 Waikiki_HA USC00519397 2017-10-22 0 30 NA
## 15 Waikiki_HA USC00519397 2017-12-22 0 26.7 NA
library(tidyverse)
knitr::opts_chunk$set( fig.width = 6, fig.asp = .6, out.width = “90%” )
theme_set(theme_bw() + theme(legend.position = “bottom”))
There are ways to set color preferences globally as well (for example, to use viridis color palettes everywhere), although they’re a bit more involved.
central_park =
weather_df %>%
filter(name == "CentralPark_NY")
waikiki =
weather_df %>%
filter(name == "Waikiki_HA")
ggplot(data = waikiki, aes(x = date, y = tmax, color = name)) +
geom_point() +
geom_line(data = central_park)
## Warning: Removed 3 rows containing missing values (geom_point).
(breif aside about colors)
waikiki %>%
ggplot(aes(x = date, y = tmax, color = "red")) + geom_point()
## Warning: Removed 3 rows containing missing values (geom_point).
waikiki %>%
ggplot(aes(x = date, y = tmax)) + geom_point(alpha = .5, color = "red")
## Warning: Removed 3 rows containing missing values (geom_point).
ggp_scatter = weather_df %>%
ggplot(aes(x = tmin, y = tmax)) +
geom_point()
ggp_density = weather_df %>%
ggplot(aes(x = tmin)) +
geom_density()
ggp_box = weather_df %>%
ggplot(aes(x = name, y = tmax, color = name)) +
geom_boxplot()
use patchwork package, to put plots together
(ggp_scatter + ggp_density) / ggp_box
## Warning: Removed 15 rows containing missing values (geom_point).
## Warning: Removed 15 rows containing non-finite values (stat_density).
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
factor variables
weather_df %>%
mutate(
name = factor(name),
name = fct_relevel(name, "Waikiki_HA", "CentralPark_NY")
) %>%
ggplot(aes(x = name, y = tmax, color = name)) +
geom_boxplot()
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
weather_df %>%
mutate(
name = factor(name),
name = fct_reorder(name, tmax)
) %>%
ggplot(aes(x = name, y = tmax, color = name)) +
geom_boxplot()
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
when creating scatterplot, in order to reorder the plot (bringing “Central Park” to the front intead of “Waikiki”), we have to reorder the dataset instead of only using “fct_reorder” (factor_ordering)
weather_df %>%
pivot_longer(
tmax:tmin,
names_to = "observation",
values_to = "temperature"
) %>%
ggplot(aes(x = temperature, fill = observation)) +
geom_density(alpha = .5) +
facet_grid(~name) +
theme(legend.position = "bottom")
## Warning: Removed 18 rows containing non-finite values (stat_density).
As a final example, we’ll revisit the FAS data. We’ve seen code for data import and organization and for joining the litters and pups data. Here we add some data tidying steps to view pup-level outcomes (post-natal day on which ears “work”, on which the pup can walk, etc) across values of dose category and treatment day.
pup_data =
read_csv("./data/FAS_pups.csv", col_types = "ciiiii") %>%
janitor::clean_names() %>%
mutate(sex = recode(sex, `1` = "male", `2` = "female"))
litter_data =
read_csv("./data/FAS_litters.csv", col_types = "ccddiiii") %>%
janitor::clean_names() %>%
select(-pups_survive) %>%
separate(group, into = c("dose", "day_of_tx"), sep = 3) %>%
mutate(wt_gain = gd18_weight - gd0_weight,
day_of_tx = as.numeric(day_of_tx))
fas_data = left_join(pup_data, litter_data, by = "litter_number")
fas_data %>%
select(sex, dose, day_of_tx, pd_ears:pd_walk) %>%
pivot_longer(
pd_ears:pd_walk,
names_to = "outcome",
values_to = "pn_day") %>%
drop_na() %>%
mutate(outcome = forcats::fct_reorder(outcome, day_of_tx, median)) %>%
ggplot(aes(x = dose, y = pn_day)) +
geom_violin() +
facet_grid(day_of_tx ~ outcome)
a more data-tidying problem rather than ggplot problem